library(broom)
library(dplyr)
library(ebal)
library(fixest)
library(ggplot2)
library(ggrepel)
library(gridExtra)
library(here)
library(kableExtra)
library(knitr)
library(readr)
library(tidyr)
library(wru) # make sure to use 2020 census data, options("wru_data_wd" = TRUE)
library(lubridate)
library(MatchIt)

datadir = here::here('data/appendix')
outputdir = here::here('output/appendix')

round_num <- function(num, digits){
  return(format(round(num, digits), nsmall=digits))
}

authors <- read_csv(file.path(datadir, 'authors_for_analysis_app.csv'))


#############################
# Figure S1
#############################

collab_trend_cn <- read_csv(file.path(datadir, 'yearly_pb_trend_CN.csv'))

collab_trend_cn <- collab_trend_cn |> mutate(across(GB:NL, ~ .x/CN)) |> 
  select(!CN) |> pivot_longer(cols = GB:NL, names_to = "Country", values_to = "val") |>
  mutate(Country = factor(Country , levels=unique(Country)))

p1 <- ggplot(data=collab_trend_cn, aes(x=index, y=val, fill=Country, color=Country)) + 
    geom_point(aes(shape=Country), size=2.8)+ geom_line(linewidth=0.8) + 
    theme_bw() + labs(x='Year', y='Share of Chinese Publications') + 
    scale_shape_manual(values = 1:9)+ geom_vline(xintercept=2018, linetype='longdash') +
    scale_x_continuous(breaks=seq(2010, 2021, 2)) + theme(text = element_text(size = 18))

ggsave(file.path(outputdir, 'yearly_trend_cn.png'), p1)

rm(list=c("collab_trend_cn", "p1"))


#############################
# Figure S2
#############################

gs <- read_csv(file.path(datadir, 'check_sample_gs.csv'))

authors_pub <- authors |>
    select(author_researcher_id, num_range('pub_total_', 2010:2020)) |>
    pivot_longer(
        !author_researcher_id,
        names_to = 'year',
        values_to = 'pubs') |>
    group_by(author_researcher_id) |>
    summarise(di_total_pub = sum(pubs))

authors_cite <- authors |>
    select(author_researcher_id, num_range('cite_total_', 2010:2020)) |>
    pivot_longer(
        !author_researcher_id,
        names_to = 'year',
        names_prefix = 'cite_total_',
        values_to = 'cites') |>
    mutate(year = as.numeric(year)) |>
    group_by(author_researcher_id, year) |>
    summarise(di_total_cite = sum(cites))

gs <- gs |>
    inner_join(authors_pub, by='author_researcher_id') |>
    inner_join(authors_cite, by=c('author_researcher_id', 'year'))

pubs <- gs |> group_by(author_researcher_id) |> slice(1)

p2 <- ggplot(pubs, aes(x=log(totalpubs+1), y=log(di_total_pub+1))) + geom_point() + 
    geom_abline(intercept = 0, slope = 1, linewidth = 0.5, lty='longdash') +
    xlim(1.7, 6) + ylim(1.7, 6) + theme_bw() + 
    labs(x='Log Publications on Google Scholar', y='Log Publications on Dimensions') +
    ggtitle(paste0('Correlation = ', round(cor(pubs$totalpubs, pubs$di_total_pub), 2)))

ggsave(file.path(outputdir, 'validation_pub.png'), p2, width = 7, height = 7)

p3 <- ggplot(gs, aes(x=log(cite+1), y=log(di_total_cite+1))) + geom_point() + 
    geom_abline(intercept = 0, slope = 1, linewidth = 0.5, lty='longdash') +
    theme_bw() + labs(x='Log Citations on Google Scholar', y='Log Citations on Dimensions') +
    ggtitle(paste0('Correlation = ', round(cor(gs$cite, gs$di_total_cite), 2)))

ggsave(file.path(outputdir, 'validation_cite.png'), p3, width = 7, height = 7)

rm(list=c("p2", "p3", "gs", "pubs", "authors_pub", "authors_cite"))


#############################
# Table S1
#############################

authors_main <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w
apply(select(authors_main, c(treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian))|>filter(treat==1), 2, mean)
apply(select(authors_main, c(treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian))|>filter(treat==0), 2, weighted.mean, w=w)

pfit <- glm(treat ~ pub_total_1014 + cite_total_1014 + pub_nih_1014 + Asian, authors_main, family = "binomial")
w_ps <- predict(pfit, type = "response")
authors_main$w_ps <- w_ps

match_fit1 <- matchit(treat ~ pub_total_1014 + cite_total_1014 + pub_nih_1014 + Asian, 
                      data = authors_main, method = "nearest", ratio = 5, replace = TRUE)
matched_data1 <- match.data(match_fit1)
summary(match_fit1)

match_fit2 <- matchit(treat ~ pub_total_1014 + cite_total_1014 + pub_nih_1014 + Asian, 
                      data = authors_main, method = "nearest",
                      distance = "mahalanobis", ratio = 5, replace = TRUE)
matched_data2 <- match.data(match_fit2)
summary(match_fit2)


#############################
# Table S2
#############################

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')

res <- matrix(NA, 27, 6)

for (i in 1:length(outcome_keys)){
  outcome <- paste0(outcome_keys[i], 2015:2021)
  dat <- authors_main[, c("author_researcher_id", "treat", "pub_total_1014", "cite_total_1014", 
           "pub_nih_1014", "Asian", outcome)]
  
  dat$weights <- 1
  dat$weights[dat$treat==0] <- w
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  out1 <- feols(Y~treatment | period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  dat <- matched_data1[, c("author_researcher_id", "treat", "pub_total_1014", "cite_total_1014", 
                          "pub_nih_1014", "Asian", outcome)]
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  out2 <- feols(Y~treatment | period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  dat <- matched_data2[, c("author_researcher_id", "treat", "pub_total_1014", "cite_total_1014", 
                           "pub_nih_1014", "Asian", outcome)]
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  out3 <- feols(Y~treatment | period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', '')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w", "outcome", "w_ps", "pfit",
  "matched_data1", "matched_data2", "match_fit1", "match_fit2"))


#############################
# Table S3
#############################

authors_main <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')
res <- matrix(NA, 27, 6)

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021), weights)
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = asinh(Y_raw),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  
  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  out3 <- feols(Y~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w"))


#############################
# Table S4
#############################

authors_main <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')
res <- matrix(NA, 27, 6)

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021), weights)
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y_raw)) |> unlist() |> round_num(3)
  
  out1 <- fepois(Y_raw~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='pr2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- fepois(Y_raw~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='pr2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  out3 <- fepois(Y_raw~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='pr2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w"))


#############################
# Table S5
#############################

authors_main <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )
w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

outcome_keys <- c('cite_avg_', 'cite_pubmed_avg_', 'cite_ratio_', 'num_hits_')
labs <- c("Average Citations", "Average PubMed Citations", 
          "Average Citation Ratio", "Number of Hit Papers")

res <- matrix(NA, 18, 6)

for (i in 1:length(outcome_keys)){
  
  outcome <- paste0(outcome_keys[i], 2015:2021)
  dat <- authors_main[, c("author_researcher_id", "treat", "pub_total_1014", "cite_total_1014", 
                          "pub_nih_1014", "Asian", "cite_avg_1014", outcome)]

  dat$weights <- 1
  dat$weights[dat$treat==0] <- w
  

  row_switch <- c(0, 0, 1, 1)
  col_switch <- c(0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  out1 <- feols(Y~treatment | period+author_id, data=panel, cluster=~author_id)

  out1 <- feols(Y~ i(period, treat, 2018)|period+author_id,
                data=panel, cluster=~author_id)
  out1 <- tidy(out1)
  out1[5:7, ] <- out1[4:6,] # add 2018
  out1$term <- c(2015:2021)
  out1[out1$term==2018,2:5] <- 0
  out1$lower <- with(out1, estimate-1.96*std.error)
  out1$upper <- with(out1, estimate+1.96*std.error)
  out1[out1$term==2018, 6:7] <- NA

  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)

  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')

  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')

  out3 <- feols(Y~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 2)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w", "labs", "outcome"))


#############################
# Table S6
#############################

authors_main <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_top_100_{2015:2021}") - 
      across(num_range('pub_top_100_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_top_100_{2015:2021}") - 
      across(num_range('cite_top_100_', 2015:2021))
    )

outcome_keys <- c('pub_top_100_', 'cite_top_100_', 'pub_non_top_100_', 'cite_non_top_100_')
res <- matrix(NA, 18, 6)

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021), weights)

  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  
  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  out3 <- feols(Y~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 2)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) 

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w"))


#############################
# Table S7
#############################
# here we only provide the estimates from the paper level analysis for data privacy reasons
pap <- read_csv(file.path(datadir, 'table_s7_estimates.csv'))

rm(list=c("pap"))


#############################
# Table S8
#############################

authors_no_covid <- read_csv(file.path(datadir, 'authors_for_analysis_covid.csv'))

authors_main <- authors_no_covid |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')
res <- matrix(NA, 27, 6)

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021), weights)
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  
  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  out3 <- feols(Y~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w", "authors_no_covid"))


###########################
# Table S9
###########################

authors_main <- authors |>
  mutate(
    share = log(cn_collab_1014+1),    
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')
res <- matrix(NA, 27, 4)

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, share, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021))
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*2
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * share,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  
  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 2, "PubMed Publications" = 2)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys"))


#############################
# Table S10
#############################

app_fund <- authors |>
  mutate(
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_nih_{2015:2021}") - 
      across(num_range('cite_nih_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_cn_funded_{2015:2021}") - 
      across(num_range('cite_cn_funded_', 2015:2021))
    )

outcome_keys <- c('cite_total_', 'cite_nih_', 'cite_non_nih_', 'cite_cn_funded_', 'cite_non_cn_funded_')

res <- matrix(NA, 8, 5)
for (i in 1:length(outcome_keys)){
    dat <- app_fund |>
        select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
            num_range(outcome_keys[i], 2015:2021))

    panel <- dat |>
        pivot_longer(
            cols = num_range(outcome_keys[i], 2015:2021),
            names_to = "year",
            names_prefix=outcome_keys[i],
            values_to = "Y_raw") |>
        mutate(
            period = rep(2015:2021, n()/7), 
            Y = log(Y_raw+1),
            post_treatment = as.numeric(period >= 2019),
            treatment = post_treatment * treat,
            author_id = as.character(author_researcher_id)
        )

    avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)

    out <- feols(Y~treatment|
        period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)

    res[, i] <- c(round_num(out$coefficients, 3), paste0('(', round_num(out$se, 3), ')'), avg_pre, round_num(r2(out, type='ar2'), 3),
        out$nobs, 'Y', 'Y', 'Y')
}

row_names <- c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
    'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year')

res <- cbind(row_names, res)
colnames(res) <- c('', 'All', 'NIH-Funded', 'Non NIH-Funded', 'China-Funded', 'Non China-Funded')

kbl(res, booktabs = T, format='latex') |>
    kable_styling(latex_options = c("scale_down"), position = "center") |>
    add_header_above(c("", "Citation Effects by Nature of Publication" = 5))

rm(list=c("app_fund", "dat", "out", "res", "panel", "avg_pre", "i", "row_names", "outcome_keys"))


#############################
# Tablee S11
#############################

app_collab <- authors |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_us_{2015:2021}") - 
      across(num_range('china_collab_', 2015:2021)) - across(num_range('other_collab_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_us_{2015:2021}") - 
      across(num_range('cite_china_collab_', 2015:2021)) - across(num_range('cite_other_collab_', 2015:2021))
    )

outcome_keys <- c('pub_us_', 'other_collab_', 'china_collab_', 'cite_us_', 'cite_other_collab_', 'cite_china_collab_')

res <- matrix(NA, 8, 6)
for (i in 1:length(outcome_keys)){
    dat <- app_collab |>
        select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
            num_range(outcome_keys[i], 2015:2021))

    panel <- dat |>
        pivot_longer(
            cols = num_range(outcome_keys[i], 2015:2021),
            names_to = "year",
            names_prefix=outcome_keys[i],
            values_to = "Y_raw") |>
        mutate(
            period = rep(2015:2021, n()/7), 
            Y = log(Y_raw+1),
            post_treatment = as.numeric(period >= 2019),
            treatment = post_treatment * treat,
            author_id = as.character(author_researcher_id)
        )

    avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)

    out <- feols(Y~treatment|
        period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)

    res[, i] <- c(round_num(out$coefficients, 3), paste0('(', round_num(out$se, 3), ')'), avg_pre, round_num(r2(out, type='ar2'), 3),
        out$nobs, 'Y', 'Y', 'Y')
}

row_names <- c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
    'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE')

res <- cbind(row_names, res)
colnames(res) <- c('', 'U.S. Publications', 'Intl (Non-China) Publications', 'China-Collab Publications', 
    'U.S. Citations', 'Intl (Non-China) Citations', 'China-Collab Citations')

kbl(res, booktabs = T, format='latex') |>
    kable_styling(latex_options = c("scale_down"), position = "center") |>
    add_header_above(c("", "Effects by Nature of Publication" = 4))

rm(list=c("app_collab", "dat", "out", "res", "panel", "avg_pre", "i", "row_names", "outcome_keys"))


#############################
# Table S12
#############################

m_bf5 <- read_csv(file.path(datadir, "migration_nonus_before_2018_0_5.csv"))
m_bf75 <- read_csv(file.path(datadir, "migration_nonus_before_2018_0_75.csv"))
m_af5 <- read_csv(file.path(datadir, "migration_nonus_after_2018_0_5.csv"))
m_af75 <- read_csv(file.path(datadir, "migration_nonus_after_2018_0_75.csv"))

authors |> select(author_researcher_id, treat) |> 
  left_join(m_bf5, by="author_researcher_id") |> 
  mutate(migrated = ifelse(is.na(perc), 0, 1)) |>
  group_by(treat) |>
  summarize(sum(migrated), sum(migrated)/n())

authors |> select(author_researcher_id, treat) |> 
  left_join(m_bf75, by="author_researcher_id") |> 
  mutate(migrated = ifelse(is.na(perc), 0, 1)) |>
  group_by(treat) |>
  summarize(sum(migrated), sum(migrated)/n())

authors |> select(author_researcher_id, treat) |> 
  left_join(m_af5, by="author_researcher_id") |> 
  mutate(migrated = ifelse(is.na(perc), 0, 1)) |>
  group_by(treat) |>
  summarize(sum(migrated), sum(migrated)/n())

authors |> select(author_researcher_id, treat) |> 
  left_join(m_af75, by="author_researcher_id") |> 
  mutate(migrated = ifelse(is.na(perc), 0, 1)) |>
  group_by(treat) |>
  summarize(sum(migrated), sum(migrated)/n())


#############################
# Tablee S13
#############################

m <- rbind(m_bf5, m_af5)
authors_wo_m <- authors |> left_join(m, by="author_researcher_id") |> mutate(migrated = ifelse(is.na(perc), 0, 1)) |> 
  filter(migrated==0)

authors_main <- authors_wo_m |>
  mutate(
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')
res <- matrix(NA, 27, 6)

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, pub_nih_1014, Asian,
           num_range(outcome_keys[i], 2015:2021), weights)
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*3
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  avg_pre <- panel |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  
  out1 <- feols(Y~treatment|period+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')
  
  out2 <- feols(Y~treatment|period+period[pub_total_1014]+period[cite_total_1014]+period[pub_nih_1014]+period[Asian]+author_id, data=panel, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
  out3 <- feols(Y~treatment|period+author_id, data=panel, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+2] <- c(round_num(out3$coefficients, 3), paste0('(', round_num(out3$se, 3), ')'), avg_pre, round_num(r2(out3, type='ar2'), 3),
                                     out3$nobs, 'Y', 'Y', '', 'Y')
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2', 
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE', 
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)

rm(list=c("authors_main", "dat", "out1", "out2", "out3", "res", "panel", "avg_pre", "row_switch",
  "col_switch", "row_n", "col_n", "i", "row_names", "outcome_keys", "w", "m", "m_bf5", "m_bf75", 
  "m_af5", "m_af75", "authors_wo_m"))


#############################
# Table S14
#############################

outcome_keys <- c('pub_pubmed_', 'cite_pubmed_', 'pub_non_pubmed_', 'cite_non_pubmed_', 'pub_total_', 'cite_total_')

authors_main <- authors |>
  mutate(
    share = log(cn_collab_1014+1),    
    across(num_range('pub_total_', 2015:2021), .names = "pub_non_pubmed_{2015:2021}") - 
      across(num_range('pub_pubmed_', 2015:2021)),
    across(num_range('cite_total_', 2015:2021), .names = "cite_non_pubmed_{2015:2021}") - 
      across(num_range('cite_pubmed_', 2015:2021))
    )

w <- ebalance(X=select(authors_main, c(pub_total_1014, cite_total_1014, pub_nih_1014, Asian)),
              Treatment=authors_main$treat)$w

authors_main$weights <- 1
authors_main$weights[authors_main$treat==0] <- w

res <- matrix(NA, 27, 4)
for (i in 1:length(outcome_keys)){
  dat <- authors_main |>
    select(author_researcher_id, treat, pub_total_1014, cite_total_1014, 
           pub_nih_1014, Asian, num_range(outcome_keys[i], 2015:2021))
  
  dat$weights <- 1
  dat$weights[dat$treat==0] <- w
  
  row_switch <- c(0, 0, 1, 1, 2, 2)
  col_switch <- c(0, 1, 0, 1, 0, 1)
  row_n <- 1 + row_switch[i]*9
  col_n <- 1 + col_switch[i]*2
  
  panel <- dat |>
    pivot_longer(
      cols = num_range(outcome_keys[i], 2015:2021),
      names_to = "year",
      names_prefix=outcome_keys[i],
      values_to = "Y_raw") |>
    mutate(
      period = rep(2015:2021, n()/7), 
      Y = log(Y_raw+1),
      post_treatment = as.numeric(period >= 2019),
      treatment = post_treatment * treat,
      author_id = as.character(author_researcher_id)
    )
  
  panel_h <- panel[panel$cite_total_1014 >= 672, ]
  panel_l <- panel[panel$cite_total_1014 < 672, ]

  avg_pre1 <- panel_h |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  out1 <- feols(Y~treatment | period+author_id, data=panel_h, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n] <- c(round_num(out1$coefficients, 3), paste0('(', round_num(out1$se, 3), ')'), avg_pre1, round_num(r2(out1, type='ar2'), 3),
                                   out1$nobs, 'Y', 'Y', '', '')

  avg_pre2 <- panel_l |> filter(treat==1 & post_treatment==0) |> summarise(avg = mean(Y)) |> unlist() |> round_num(3)
  out2 <- feols(Y~treatment | period+author_id, data=panel_l, weights=~weights, cluster=~author_id)
  res[row_n:(row_n+8), col_n+1] <- c(round_num(out2$coefficients, 3), paste0('(', round_num(out2$se, 3), ')'), avg_pre2, round_num(r2(out2, type='ar2'), 3),
                                     out2$nobs, 'Y', 'Y', 'Y', '')
  
}

row_names <- rep(c('Ties to China × Post', '', 'Pre-treatment avg.', 'R2',
                   'No. of obs.', 'Scholar FE', 'Year FE', 'Baseline Covariates*Year FE',
                   'Entropy Balancing'), 3)

res <- cbind(row_names, res)
colnames(res) <- NULL

kbl(res, booktabs = T, format='latex') |>
  kable_styling(latex_options = c("scale_down"), position = "center") |>
  add_header_above(c("", "PubMed Publications" = 3, "PubMed Publications" = 3)) |>
  add_header_above(c("", "(1)", "(2)", "(3)", "(4)", "(5)", "(6)")) |>
  pack_rows("Panel A", 1, 9) |>
  pack_rows("Panel B", 10, 18) |>
  pack_rows("Panel C", 19, 27)


#############################
# Table S15
#############################

# Data is not released for privacy reasons


#############################
# Table S16
#############################
fields_meta <- read_csv(file.path(datadir, 'fields_for_analysis_meta.csv'))
fields_meta <- fields_meta |> filter(num_total >= 50000)
fields_meta[order(fields_meta$pct_us_cn_colab, decreasing=TRUE), ]
fields_meta[order(fields_meta$pct_nih, decreasing=TRUE), ]

















